Code
library (dplyr) ; library (tidyr) ; library (ggplot2)
# library(doParallel) ; library(foreach) ; library(doSNOW)
root <- rprojroot:: has_file (".git/index" )
datadir = root$ find_file ("data" )
funsdir = root$ find_file ("functions" )
savingdir = root$ find_file ("saved_files" )
savingdirOceanSlices = root$ find_file ("saved_files_oceanSlices" )
savingdirBinary = root$ find_file ("saved_files_binary" )
datapath = root$ find_file (paste0 (datadir,'/' ,'grump.phaeocystis_asv_long.csv' ))
files_vec <- list.files (funsdir)
for ( i in 1 : length (files_vec)){
source (root$ find_file (paste0 (funsdir,'/' ,files_vec[i])))
}
dframe = data.table:: fread (input = datapath) %>% filter (Cruise!= "MOSAiC" ,Raw.Sequence.Counts> 0 )
dframe_allASVs = tidy_grump (Dframe = dframe,binnary = T)
useStoredFiles = T
The idea is to create a ‘custom’ distance matrix, to induce the grouppings.
\(c_1\) is within the same species - but no unassigned
\(c_2\) is between assigned species
\(c_3\) is between species and unassigned
\(c_4\) is within unassigned and unassigned
For this document we will use only the following configuration:
\(c_1=1\) , \(c_2=1000\) , \(c_3=10\) , \(c_4=10\)
A priori, the ‘probability’ of unassigned ASVs to agglomerate between themselves is the same as if it was to agglomerate with other species. Large \(c_2\) makes it harder for known species ASVs to group between themselves.
Code
inducedDist_ = inducedDist (
dFrame = dframe_allASVs$ dframe,
c1 = 1 ,c2= 1000 ,c3= 10 ,c4= 10 ,
compMatrix = dframe_allASVs$ ASV_composition %>% data.frame ())
If we exclude the ASVs that appears only one ~or two~ times, does it help?
Code
nASVs <- dframe_allASVs$ dframe %>% select (ASV_name) %>% distinct () %>% nrow ()
plot0s_summ = dframe_allASVs$ dframe %>% group_by (ASV_name,SampleKey) %>%
summarise (Freq= n ()) %>%
mutate (nSamplesObserved = sum (Freq)) %>%
arrange (nSamplesObserved) %>%
select (ASV_name,nSamplesObserved) %>% distinct () %>%
mutate (FreqNSamples= cut (nSamplesObserved,breaks = c (0 ,1 ,2 ,3 ,5 ,10 ,25 ,50 ,100 ,250 ,1500 ),right = T,include.lowest = T)) %>%
group_by (FreqNSamples) %>%
summarise (Freq= n (),Pct= n ()/ nASVs) %>%
mutate (CummPct = cumsum (Pct))
### Creating vectors with id of ASVs to remove:
ASVs2Remove_df = dframe_allASVs$ dframe %>% group_by (ASV_name,SampleKey) %>%
summarise (Freq= n ()) %>%
mutate (nSamplesObserved = sum (Freq)) %>%
arrange (nSamplesObserved) %>%
select (ASV_name,nSamplesObserved) %>% distinct ()
vetASVs_2orMore = ASVs2Remove_df %>% filter (nSamplesObserved< 2 ) %>% select (ASV_name) %>% pull ()
vetASVs_3orMore = ASVs2Remove_df %>% filter (nSamplesObserved< 3 ) %>% select (ASV_name) %>% pull ()
saveRDS (vetASVs_2orMore,file = paste0 (savingdirOceanSlices,'/' ,'asvs_to_remove_oneSample' ))
saveRDS (vetASVs_3orMore,file = paste0 (savingdirOceanSlices,'/' ,'asvs_to_remove_twoSample' ))
nsamples = dframe_allASVs$ dframe %>% select (SampleID) %>% distinct () %>% nrow ()
plot0s_summ2 = dframe_allASVs$ dframe %>%
group_by (SampleKey) %>%
summarise (NumbersOfAsvInEachSample= n ()) %>%
arrange (NumbersOfAsvInEachSample) %>%
mutate (FreqAsvs= cut (NumbersOfAsvInEachSample,breaks = c (0 ,1 ,2 ,3 ,5 ,10 ,25 ,50 ,100 ,250 ,500 ),
right = T,include.lowest = T)) %>%
group_by (FreqAsvs) %>%
summarise (Freq= n (),Pct= n ()/ nsamples) %>%
mutate (CummPct = cumsum (Pct))
plot0s_summ %>% knitr:: kable (digits = 3 ) %>% kableExtra:: kable_styling (full_width = FALSE , position = "left" )
[0,1]
715
0.482
0.482
(1,2]
169
0.114
0.596
(2,3]
80
0.054
0.650
(3,5]
103
0.070
0.720
(5,10]
121
0.082
0.802
(10,25]
129
0.087
0.889
(25,50]
67
0.045
0.934
(50,100]
48
0.032
0.966
(100,250]
41
0.028
0.994
(250,1.5e+03]
9
0.006
1.000
Code
plot0s_summ2 %>% knitr:: kable (digits = 3 ) %>% kableExtra:: kable_styling (full_width = FALSE , position = "float_right" )
[0,1]
90
0.091
0.091
(1,2]
58
0.059
0.150
(2,3]
36
0.036
0.186
(3,5]
62
0.063
0.249
(5,10]
161
0.163
0.412
(10,25]
243
0.246
0.659
(25,50]
253
0.256
0.915
(50,100]
82
0.083
0.998
(100,250]
2
0.002
1.000
Lets now compare the distance matrix using mds, ltns, and umap.
I’m suggesting three different weight metrics:
We create the sample compositions, and add the RA of each ASV across all samples. I’m calling this as Sum_RA
Count in how many samples each ASV appeared. Denoting this metric as Sum_Apearance
Sum_SeqCounts = Sum of all RawSeqCounts for each ASVs.
Code
if (! useStoredFiles){
aDist = vegan:: vegdist (x = dframe_allASVs$ ASV_composition %>% select (- name) %>%
data.frame (),method = 'aitchison' )
coord_plots_all <- distMatrix_dimReduc_coords (distMatrix = as.matrix (aDist))
saveRDS (coord_plots_all,file = paste0 (savingdir,'/' ,'coord_plots_all' ))
}
coord_plots_all = readRDS (file = paste0 (savingdir,'/' ,'coord_plots_all' ))
dim_reduce_obj_all <- plot_dimReduc_coords (coord_plots_all,weights = df_weights$ Sum_RA,clusters = df_weights$ Species)
dim_reduce_obj_all$ MDS_2d
dim_reduce_obj_all$ nMDS_2d
dim_reduce_obj_all$ TSNE_2d
dim_reduce_obj_all$ umap_2d
Code
if (! useStoredFiles){
aDist = vegan:: vegdist (x = dframe2plus$ ASV_composition %>% select (- name) %>% data.frame (),method = 'aitchison' )
coord_plots_2ormore <- distMatrix_dimReduc_coords (distMatrix = as.matrix (aDist))
saveRDS (coord_plots_2ormore,file = paste0 (savingdir,'/' ,'coord_plots_2ormore' ))
}
coord_plots_2ormore = readRDS (file = paste0 (savingdir,'/' ,'coord_plots_2ormore' ))
dim_reduce_obj_2ormore <- plot_dimReduc_coords (
coord_plots_2ormore,weights = df_weights2plus$ Sum_RA,
clusters = df_weights2plus$ Species)
dim_reduce_obj_2ormore$ MDS_2d
dim_reduce_obj_2ormore$ nMDS_2d
dim_reduce_obj_2ormore$ TSNE_2d
dim_reduce_obj_2ormore$ umap_2d
Code
dim (dframe2plus$ ASV_composition)
We have 767 ASVs and 975 samples
Code
if (! useStoredFiles){
aDist = vegan:: vegdist (x = dframe3plus$ ASV_composition %>% select (- name) %>% data.frame (),method = 'aitchison' )
coord_plots_3ormore <- distMatrix_dimReduc_coords (distMatrix = as.matrix (aDist),perplexityTsne = 150 )
saveRDS (coord_plots_3ormore,file = paste0 (savingdir,'/' ,'coord_plots_3ormore' ))
}
coord_plots_3ormore = readRDS (file = paste0 (savingdir,'/' ,'coord_plots_3ormore' ))
dim_reduce_obj_3ormore <- plot_dimReduc_coords (
coord_plots_3ormore,
weights = df_weights3plus$ Sum_RA,clusters = df_weights3plus$ Species)
dim_reduce_obj_3ormore$ MDS_2d
dim_reduce_obj_3ormore$ nMDS_2d
dim_reduce_obj_3ormore$ TSNE_2d
dim_reduce_obj_3ormore$ umap_2d
Code
dim (dframe3plus$ ASV_composition)
598 ASVs and 970 samples.
It feels that removing rare ASVs won’t help..
Code
if (! useStoredFiles){
brayDist = vegan:: vegdist (x = dframe_allASVs$ ASV_composition %>% select (- name) %>%
data.frame (),method = 'bray' )
coord_plots_all_bray <- distMatrix_dimReduc_coords (distMatrix = as.matrix (brayDist))
saveRDS (coord_plots_all_bray,file = paste0 (savingdir,'/' ,'coord_plots_all_bray' ))
}
coord_plots_all_bray = readRDS (file = paste0 (savingdir,'/' ,'coord_plots_all_bray' ))
dim_reduce_obj_all_bray <- plot_dimReduc_coords (
coord_plots_all_bray,weights = df_weights$ Sum_RA,
clusters = df_weights$ Species)
dim_reduce_obj_all_bray$ MDS_2d
dim_reduce_obj_all_bray$ nMDS_2d
dim_reduce_obj_all_bray$ TSNE_2d
dim_reduce_obj_all_bray$ umap_2d
Code
if (! useStoredFiles){
brayDist = vegan:: vegdist (x = dframe2plus$ ASV_composition %>% select (- name) %>% data.frame (),method = 'bray' )
coord_plots_2ormore_bray <- distMatrix_dimReduc_coords (distMatrix = as.matrix (brayDist))
saveRDS (coord_plots_2ormore_bray,file = paste0 (savingdir,'/' ,'coord_plots_2ormore_bray' ))
}
coord_plots_2ormore_bray = readRDS (file = paste0 (savingdir,'/' ,'coord_plots_2ormore_bray' ))
dim_reduce_obj_2ormore_bray <- plot_dimReduc_coords (
coord_plots_2ormore_bray,
weights = df_weights2plus$ Sum_RA,
clusters = df_weights2plus$ Species)
dim_reduce_obj_2ormore_bray$ MDS_2d
dim_reduce_obj_2ormore_bray$ nMDS_2d
dim_reduce_obj_2ormore_bray$ TSNE_2d
dim_reduce_obj_2ormore_bray$ umap_2d
Code
dim (dframe2plus$ ASV_composition)
We have 767 ASVs and 975 samples
Code
if (! useStoredFiles){
brayDist = vegan:: vegdist (x = dframe3plus$ ASV_composition %>% select (- name) %>% data.frame (),method = 'bray' )
coord_plots_3ormore_bray <- distMatrix_dimReduc_coords (distMatrix = as.matrix (brayDist),perplexityTsne = 150 )
saveRDS (coord_plots_3ormore_bray,file = paste0 (savingdir,'/' ,'coord_plots_3ormore_bray' ))
}
coord_plots_3ormore_bray = readRDS (file = paste0 (savingdir,'/' ,'coord_plots_3ormore_bray' ))
dim_reduce_obj_3ormore_bray <- plot_dimReduc_coords (
coord_plots_3ormore_bray,
weights = df_weights3plus$ Sum_RA,
clusters = df_weights3plus$ Species)
dim_reduce_obj_3ormore_bray$ MDS_2d
dim_reduce_obj_3ormore_bray$ nMDS_2d
dim_reduce_obj_3ormore_bray$ TSNE_2d
dim_reduce_obj_3ormore_bray$ umap_2d
Code
dim (dframe3plus$ ASV_composition)
598 ASVs and 970 samples.
It feels that removing rare ASVs won’t help..
Code
if (! useStoredFiles){
hammDistMatrix = hammingDist (X = dframe_allASVs$ binnary_ASV_df %>% select (- ASV_name) %>% as.matrix ())
coord_plots_all_bin <- distMatrix_dimReduc_coords (distMatrix = as.matrix (hammDistMatrix),perplexityTsne = 150 )
saveRDS (coord_plots_all_bin,file = paste0 (savingdir,'/' ,'coord_plots_all_bin' ))
}
coord_plots_all_bin = readRDS (file = paste0 (savingdir,'/' ,'coord_plots_all_bin' ))
coord_plots_all_bin_plots <- plot_dimReduc_coords (
coord_plots_all_bin,
weights = df_weights$ Sum_RA,
clusters = df_weights$ Species)
coord_plots_all_bin_plots$ MDS_2d
coord_plots_all_bin_plots$ nMDS_2d
coord_plots_all_bin_plots$ TSNE_2d
coord_plots_all_bin_plots$ umap_2d
Code
if (! useStoredFiles){
hammDistMatrix = hammingDist (X = dframe2plus$ binnary_ASV_df %>% select (- ASV_name) %>% as.matrix ())
coord_plots_2ormore_bin <- distMatrix_dimReduc_coords (distMatrix = as.matrix (hammDistMatrix),perplexityTsne = 150 )
saveRDS (coord_plots_2ormore_bin,file = paste0 (savingdir,'/' ,'coord_plots_2ormore_bin' ))
}
coord_plots_2ormore_bin = readRDS (file = paste0 (savingdir,'/' ,'coord_plots_2ormore_bin' ))
coord_plots_2ormore_bin_plots <- plot_dimReduc_coords (
coord_plots_2ormore_bin,
weights = df_weights2plus$ Sum_RA,
clusters = df_weights2plus$ Species)
coord_plots_2ormore_bin_plots$ MDS_2d
coord_plots_2ormore_bin_plots$ nMDS_2d
coord_plots_2ormore_bin_plots$ TSNE_2d
coord_plots_2ormore_bin_plots$ umap_2d
We have 767 ASVs and 975 samples
Code
if (! useStoredFiles){
hammDistMatrix = hammingDist (X = dframe3plus$ binnary_ASV_df %>% select (- ASV_name) %>% as.matrix ())
coord_plots_3ormore_bin <- distMatrix_dimReduc_coords (distMatrix = as.matrix (hammDistMatrix),perplexityTsne = 150 )
saveRDS (coord_plots_3ormore_bin,file = paste0 (savingdir,'/' ,'coord_plots_3ormore_bin' ))
}
coord_plots_3ormore_bin = readRDS (file = paste0 (savingdir,'/' ,'coord_plots_3ormore_bin' ))
coord_plots_3ormore_bin_plots <- plot_dimReduc_coords (
coord_plots_3ormore_bin,
weights = df_weights3plus$ Sum_RA,
clusters = df_weights3plus$ Species)
coord_plots_3ormore_bin_plots$ MDS_2d
coord_plots_3ormore_bin_plots$ nMDS_2d
coord_plots_3ormore_bin_plots$ TSNE_2d
coord_plots_3ormore_bin_plots$ umap_2d
598 ASVs and 971 samples.
Can we create a mixture of those tow distances and see if a pattern emerge?
Code
if (! useStoredFiles){
hammDistMatrix = hammingDist (X = dframe_allASVs$ binnary_ASV_df %>% select (- ASV_name) %>% as.matrix ())
aitDistMatrix = robCompositions:: aDist (x = dframe_allASVs$ ASV_composition %>% select (- name) %>% data.frame ())
fifty_fifty_matrix = 0.5 * normalizeDistMatrix (hammDistMatrix) + 0.5 * normalizeDistMatrix (aitDistMatrix)
coord_plots_mixed_All <- distMatrix_dimReduc_coords (distMatrix = as.matrix (hammDistMatrix),perplexityTsne = 150 )
saveRDS (coord_plots_mixed_All,file = paste0 (savingdir,'/' ,'coord_plots_mixed_All' ))
}
coord_plots_mixed_All = readRDS (paste0 (savingdir,'/' ,'coord_plots_mixed_All' ))
coord_plots_mixed_All_plots <- plot_dimReduc_coords (coord_plots_mixed_All,weights = df_weights$ Sum_RA)
coord_plots_mixed_All_plots$ MDS_2d
coord_plots_mixed_All_plots$ nMDS_2d
coord_plots_mixed_All_plots$ TSNE_2d
coord_plots_mixed_All_plots$ umap_2d
Haven’t compared it yet, but could be and Idea…
Here the idea is to aggregate some samples. So we would have compositions of ASVs but instead of samples we would have a less sparse and with less dimensions. The most natural could be:
Compositions of ASVs, under longhurst provinces combined with different depths
Chunks of latitudes, and chunks of depths.
So for each aggregation we should take a look at mds, tsne, umap of the ait distances (or distance matrix using the CLR transformation)
So first lets do a bit of EDA to see what we can expect.
Code
dframe_allASVs$ dframe %>% select (SampleKey,Depth,Longhurst_Short) %>% distinct () %>%
ggplot (aes (Depth))+ geom_histogram ()
dframe_allASVs$ dframe %>% select (SampleKey,Depth,Longhurst_Short) %>% distinct () %>%
ggplot (aes (Depth))+ geom_boxplot ()
Let’s slice the dephts of the ocean in 10%.
Code
distinct_depth <- dframe_allASVs$ dframe %>% select (SampleKey,Depth) %>% distinct ()
qtls_0.1 = quantile (distinct_depth$ Depth, seq (0 ,1 ,0.1 ))
qtlsNames = 1 : (length (qtls_0.1 )- 1 )
qtlsNames = ifelse (qtlsNames< 10 ,paste0 ('DGR0' ,qtlsNames),paste0 ('DGR' ,qtlsNames))
qtls_0.1
0% 10% 20% 30% 40% 50% 60%
0.00000 1.18800 15.00000 28.80741 45.00000 61.50000 81.29718
70% 80% 90% 100%
105.47600 150.64307 335.50200 2568.80000
Here are the depth quantiles.
Code
distinct_depth = distinct_depth %>%
mutate (CatDepth= cut (Depth,breaks = qtls_0.1 ,include.lowest = T,right = T,labels = qtlsNames))
distinct_depth %>% group_by (CatDepth) %>%
summarise (Freq= n ())
# A tibble: 10 × 2
CatDepth Freq
<fct> <int>
1 DGR01 99
2 DGR02 115
3 DGR03 82
4 DGR04 112
5 DGR05 86
6 DGR06 98
7 DGR07 99
8 DGR08 98
9 DGR09 99
10 DGR10 99
Code
dframe_allASVs$ dframe %>% select (SampleKey,Depth,Longhurst_Short) %>% distinct () %>%
mutate (CatDepth= cut (Depth,breaks = qtls_0.1 ,include.lowest = T,right = T,labels = qtlsNames)) %>%
ggplot (aes (CatDepth))+ geom_bar ()+
facet_wrap (~ Longhurst_Short,scales= 'free_y' )
This is the distribution of number of samples for each GDR (group depth rank for each longhurst province.
So our first tentative will be to create compositions of asvs with this amount of combinations (GDRxLH)
Code
Lat_Depth_Composition_df= readRDS (file = paste0 (savingdir,'/' ,'Lat_Depth_Composition_df' ))
if (! useStoredFiles){
#hammDistMatrix = hammingDist(X = dframe_allASVs$binnary_ASV_df %>% select(-ASV_name) %>% as.matrix())
aitDistMatrix = robCompositions:: aDist (x = Lat_Depth_Composition_df %>% select (- name) %>% data.frame ())
Lat_Depth_ait_all_coord <- distMatrix_dimReduc_coords (distMatrix = as.matrix (aitDistMatrix),perplexityTsne = 150 )
saveRDS (Lat_Depth_ait_all_coord,file = paste0 (savingdir,'/' ,'Lat_Depth_ait_all_coord' ))
}
Lat_Depth_ait_all_coord = readRDS (file = paste0 (savingdir,'/' ,'Lat_Depth_ait_all_coord' ))
Lat_Depth_ait_all_coord_plots <- plot_dimReduc_coords (Lat_Depth_ait_all_coord)
Lat_Depth_ait_all_coord_plots$ MDS_2d
Code
Lat_Depth_ait_all_coord_plots$ nMDS_2d
Code
Lat_Depth_ait_all_coord_plots$ TSNE_2d
Code
Lat_Depth_ait_all_coord_plots$ umap_2d
Code
LH_Depth_Composition_df= readRDS (file = paste0 (savingdir,'/' ,'LH_Depth_Composition_df' ))
if (! useStoredFiles){
#hammDistMatrix = hammingDist(X = dframe_allASVs$binnary_ASV_df %>% select(-ASV_name) %>% as.matrix())
aitDistMatrix = robCompositions:: aDist (x = LH_Depth_Composition_df %>% select (- name) %>% data.frame ())
LH_Depth_ait_all_coord <- distMatrix_dimReduc_coords (distMatrix = as.matrix (aitDistMatrix),perplexityTsne = 150 )
saveRDS (LH_Depth_ait_all_coord,file = paste0 (savingdir,'/' ,'LH_Depth_ait_all_coord' ))
}
LH_Depth_ait_all_coord = readRDS (file = paste0 (savingdir,'/' ,'LH_Depth_ait_all_coord' ))
LH_Depth_ait_all_coord_plots <- plot_dimReduc_coords (LH_Depth_ait_all_coord)
LH_Depth_ait_all_coord_plots$ MDS_2d
LH_Depth_ait_all_coord_plots$ nMDS_2d
LH_Depth_ait_all_coord_plots$ TSNE_2d
LH_Depth_ait_all_coord_plots$ umap_2d
Code
## Varying Alpha
## Alpha == 0 ---------------------------------------------------------------------------------------------------------------------
running = F
if (running){
hammingDistMatrix = hammingDist (dframe_allASVs$ binnary_ASV_df %>% select (- ASV_name) %>% as.matrix ())
hammingDistMatrix = normalizeDistMatrix (hammingDistMatrix)
inducedDist_ = inducedDist (
dFrame = dframe_allASVs$ dframe,
c1 = 1 ,c2= 1000 ,c3= 10 ,c4= 10 ,
compMatrix = dframe_allASVs$ ASV_composition %>% data.frame ())
myDist = inducedDist_$ normalizedMyDist
hamming_clusters_alpha0 <- clusterize_ward_pam (
distMatrix = hammingDistMatrix,
maxnclust = 50 ,dFrameAsv = dframe_allASVs$ binnary_ASV_df$ ASV_name)
saveRDS (hamming_clusters_alpha0,file = paste0 (savingdir,'/' ,'hamming_clusters_alpha0' ))
## Varying Alpha
## Alpha == 0.1 ---------------------------------------------------------------------------------------------------------------------
alpha_ = 0.1
mydist = (1 - alpha_) * hammingDistMatrix + alpha_* inducedDist_$ normalizedMyDist
hamming_clusters_alpha0.1 <- clusterize_ward_pam (
distMatrix = mydist,
maxnclust = 50 ,dFrameAsv = dframe_allASVs$ binnary_ASV_df$ ASV_name)
saveRDS (hamming_clusters_alpha0.1 ,file = paste0 (savingdir,'/' ,'hamming_clusters_alpha0.1' ))
## Alpha == 0.25 ---------------------------------------------------------------------------------------------------------------------
alpha_ = 0.25
mydist = (1 - alpha_) * hammingDistMatrix + alpha_* inducedDist_$ normalizedMyDist
hamming_clusters_alpha0.25 <- clusterize_ward_pam (
distMatrix = mydist,
maxnclust = 50 ,dFrameAsv = dframe_allASVs$ binnary_ASV_df$ ASV_name)
saveRDS (hamming_clusters_alpha0.25 ,file = paste0 (savingdir,'/' ,'hamming_clusters_alpha0.25' ))
## Alpha == 0.35 ---------------------------------------------------------------------------------------------------------------------
alpha_ = 0.35
mydist = (1 - alpha_) * hammingDistMatrix + alpha_* inducedDist_$ normalizedMyDist
hamming_clusters_alpha0.35 <- clusterize_ward_pam (
distMatrix = mydist,
maxnclust = 50 ,dFrameAsv = dframe_allASVs$ binnary_ASV_df$ ASV_name)
saveRDS (hamming_clusters_alpha0.35 ,file = paste0 (savingdir,'/' ,'hamming_clusters_alpha0.35' ))
}
Code
running = F
if (running){
inducedDist_ = inducedDist (
dFrame = dframe2plus$ dframe,
c1 = 1 ,c2= 1000 ,c3= 10 ,c4= 10 ,
compMatrix = dframe2plus$ ASV_composition %>% data.frame ())
hammingDistMatrix = hammingDist (dframe2plus$ binnary_ASV_df %>% select (- ASV_name) %>% as.matrix ())
hammingDistMatrix = normalizeDistMatrix (hammingDistMatrix)
## Varying Alpha
## Alpha == 0 ---------------------------------------------------------------------------------------------------------------------
hamming_clusters_alpha0 <- clusterize_ward_pam (
distMatrix = hammingDistMatrix,
maxnclust = 50 ,dFrameAsv = dframe2plus$ binnary_ASV_df$ ASV_name)
saveRDS (hamming_clusters_alpha0,file = paste0 (savingdir,'/' ,'hamming_clusters_2plus_alpha0' ))
## Varying Alpha
## Alpha == 0.1 ---------------------------------------------------------------------------------------------------------------------
alpha_ = 0.1
mydist = (1 - alpha_) * hammingDistMatrix + alpha_* inducedDist_$ normalizedMyDist
hamming_clusters_alpha0.1 <- clusterize_ward_pam (
distMatrix = mydist,
maxnclust = 50 ,dFrameAsv = dframe2plus$ binnary_ASV_df$ ASV_name)
saveRDS (hamming_clusters_alpha0.1 ,file = paste0 (savingdir,'/' ,'hamming_clusters_2plus_alpha0.1' ))
## Alpha == 0.25 ---------------------------------------------------------------------------------------------------------------------
alpha_ = 0.25
mydist = (1 - alpha_) * hammingDistMatrix + alpha_* inducedDist_$ normalizedMyDist
hamming_clusters_alpha0.25 <- clusterize_ward_pam (
distMatrix = mydist,
maxnclust = 50 ,dFrameAsv = dframe2plus$ binnary_ASV_df$ ASV_name)
saveRDS (hamming_clusters_alpha0.25 ,file = paste0 (savingdir,'/' ,'hamming_clusters_2plus_alpha0.25' ))
## Alpha == 0.35 ---------------------------------------------------------------------------------------------------------------------
alpha_ = 0.35
mydist = (1 - alpha_) * hammingDistMatrix + alpha_* inducedDist_$ normalizedMyDist
hamming_clusters_alpha0.35 <- clusterize_ward_pam (
distMatrix = mydist,
maxnclust = 50 ,dFrameAsv = dframe2plus$ binnary_ASV_df$ ASV_name)
saveRDS (hamming_clusters_alpha0.35 ,file = paste0 (savingdir,'/' ,'hamming_clusters_2plus_alpha0.35' ))
}
Code
running = F
if (running){
inducedDist_ = inducedDist (
dFrame = dframe3plus$ dframe,
c1 = 1 ,c2= 1000 ,c3= 10 ,c4= 10 ,
compMatrix = dframe3plus$ ASV_composition %>% data.frame ())
hammingDistMatrix = hammingDist (dframe3plus$ binnary_ASV_df %>% select (- ASV_name) %>% as.matrix ())
hammingDistMatrix = normalizeDistMatrix (hammingDistMatrix)
## Varying Alpha
## Alpha == 0 ---------------------------------------------------------------------------------------------------------------------
hamming_clusters_alpha0 <- clusterize_ward_pam (
distMatrix = hammingDistMatrix,
maxnclust = 50 ,dFrameAsv = dframe3plus$ binnary_ASV_df$ ASV_name)
saveRDS (hamming_clusters_alpha0,file = paste0 (savingdir,'/' ,'hamming_clusters_3plus_alpha0' ))
## Varying Alpha
## Alpha == 0.1 ---------------------------------------------------------------------------------------------------------------------
alpha_ = 0.1
mydist = (1 - alpha_) * hammingDistMatrix + alpha_* inducedDist_$ normalizedMyDist
#running = F
hamming_clusters_alpha0.1 <- clusterize_ward_pam (
distMatrix = mydist,
maxnclust = 50 ,dFrameAsv = dframe3plus$ binnary_ASV_df$ ASV_name)
saveRDS (hamming_clusters_alpha0.1 ,file = paste0 (savingdir,'/' ,'hamming_clusters_3plus_alpha0.1' ))
## Alpha == 0.25 ---------------------------------------------------------------------------------------------------------------------
alpha_ = 0.25
mydist = (1 - alpha_) * hammingDistMatrix + alpha_* inducedDist_$ normalizedMyDist
#running = F
hamming_clusters_alpha0.25 <- clusterize_ward_pam (
distMatrix = mydist,
maxnclust = 50 ,dFrameAsv = dframe3plus$ binnary_ASV_df$ ASV_name)
saveRDS (hamming_clusters_alpha0.25 ,file = paste0 (savingdir,'/' ,'hamming_clusters_3plus_alpha0.25' ))
## Alpha == 0.35 ---------------------------------------------------------------------------------------------------------------------
alpha_ = 0.35
mydist = (1 - alpha_) * hammingDistMatrix + alpha_* inducedDist_$ normalizedMyDist
#running = F
hamming_clusters_alpha0.35 <- clusterize_ward_pam (
distMatrix = mydist,
maxnclust = 50 ,dFrameAsv = dframe3plus$ binnary_ASV_df$ ASV_name)
saveRDS (hamming_clusters_alpha0.35 ,file = paste0 (savingdir,'/' ,'hamming_clusters_3plus_alpha0.35' ))
}
Code
## Varying Alpha
## Alpha == 0 ---------------------------------------------------------------------------------------------------------------------
running = F
if (running){
hammingDistMatrix = hammingDist (dframe_allASVs$ binnary_ASV_df %>% select (- ASV_name) %>% as.matrix ())
hammingDistMatrix = normalizeDistMatrix (hammingDistMatrix)
inducedDist_ = inducedDist (
dFrame = dframe_allASVs$ dframe,
c1 = 1 ,c2= 1000 ,c3= 10 ,c4= 10 ,
compMatrix = dframe_allASVs$ ASV_composition %>% data.frame ())
hamming_clusters_alpha0 <- clusterize_ward_pam_weighted (
distMatrix = hammingDistMatrix,
maxnclust = 50 ,dFrameAsv = dframe_allASVs$ binnary_ASV_df$ ASV_name,weights_vec = df_weights$ Sum_RA)
saveRDS (hamming_clusters_alpha0,file = paste0 (savingdir,'/' ,'hamming_weighted_clusters_alpha0' ))
## Varying Alpha
## Alpha == 0.1 ---------------------------------------------------------------------------------------------------------------------
alpha_ = 0.1
mydist = (1 - alpha_) * hammingDistMatrix + alpha_* inducedDist_$ normalizedMyDist
hamming_clusters_alpha0.1 <- clusterize_ward_pam_weighted (
distMatrix = mydist,
maxnclust = 50 ,dFrameAsv = dframe_allASVs$ binnary_ASV_df$ ASV_name,weights_vec = df_weights$ Sum_RA)
saveRDS (hamming_clusters_alpha0.1 ,file = paste0 (savingdir,'/' ,'hamming_weighted_clusters_alpha0.1' ))
## Alpha == 0.25 ---------------------------------------------------------------------------------------------------------------------
alpha_ = 0.25
mydist = (1 - alpha_) * hammingDistMatrix + alpha_* inducedDist_$ normalizedMyDist
hamming_clusters_alpha0.25 <- clusterize_ward_pam_weighted (
distMatrix = mydist,
maxnclust = 50 ,dFrameAsv = dframe_allASVs$ binnary_ASV_df$ ASV_name,weights_vec = df_weights$ Sum_RA)
saveRDS (hamming_clusters_alpha0.25 ,file = paste0 (savingdir,'/' ,'hamming_weighted_clusters_alpha0.25' ))
## Alpha == 0.35 ---------------------------------------------------------------------------------------------------------------------
alpha_ = 0.35
mydist = (1 - alpha_) * hammingDistMatrix + alpha_* inducedDist_$ normalizedMyDist
hamming_clusters_alpha0.35 <- clusterize_ward_pam_weighted (
distMatrix = mydist,
maxnclust = 50 ,dFrameAsv = dframe_allASVs$ binnary_ASV_df$ ASV_name,weights_vec = df_weights$ Sum_RA)
saveRDS (hamming_clusters_alpha0.35 ,file = paste0 (savingdir,'/' ,'hamming_weighted_clusters_alpha0.35' ))
}
Code
running = F
if (running){
inducedDist_ = inducedDist (
dFrame = dframe2plus$ dframe,
c1 = 1 ,c2= 1000 ,c3= 10 ,c4= 10 ,
compMatrix = dframe2plus$ ASV_composition %>% data.frame ())
hammingDistMatrix = hammingDist (dframe2plus$ binnary_ASV_df %>% select (- ASV_name) %>% as.matrix ())
hammingDistMatrix = normalizeDistMatrix (hammingDistMatrix)
## Varying Alpha
## Alpha == 0 ---------------------------------------------------------------------------------------------------------------------
hamming_clusters_alpha0 <- clusterize_ward_pam_weighted (
distMatrix = hammingDistMatrix,
maxnclust = 50 ,dFrameAsv = dframe2plus$ binnary_ASV_df$ ASV_name,weights_vec = df_weights2plus$ Sum_RA)
saveRDS (hamming_clusters_alpha0,file = paste0 (savingdir,'/' ,'hamming_weighted_clusters_2plus_alpha0' ))
## Varying Alpha
## Alpha == 0.1 ---------------------------------------------------------------------------------------------------------------------
alpha_ = 0.1
mydist = (1 - alpha_) * hammingDistMatrix + alpha_* inducedDist_$ normalizedMyDist
hamming_clusters_alpha0.1 <- clusterize_ward_pam_weighted (
distMatrix = mydist,
maxnclust = 50 ,dFrameAsv = dframe2plus$ binnary_ASV_df$ ASV_name,weights_vec = df_weights2plus$ Sum_RA)
saveRDS (hamming_clusters_alpha0.1 ,file = paste0 (savingdir,'/' ,'hamming_weighted_clusters_2plus_alpha0.1' ))
## Alpha == 0.25 ---------------------------------------------------------------------------------------------------------------------
alpha_ = 0.25
mydist = (1 - alpha_) * hammingDistMatrix + alpha_* inducedDist_$ normalizedMyDist
hamming_clusters_alpha0.25 <- clusterize_ward_pam_weighted (
distMatrix = mydist,
maxnclust = 50 ,dFrameAsv = dframe2plus$ binnary_ASV_df$ ASV_name,weights_vec = df_weights2plus$ Sum_RA)
saveRDS (hamming_clusters_alpha0.25 ,file = paste0 (savingdir,'/' ,'hamming_weighted_clusters_2plus_alpha0.25' ))
## Alpha == 0.35 ---------------------------------------------------------------------------------------------------------------------
alpha_ = 0.35
mydist = (1 - alpha_) * hammingDistMatrix + alpha_* inducedDist_$ normalizedMyDist
hamming_clusters_alpha0.35 <- clusterize_ward_pam_weighted (
distMatrix = mydist,
maxnclust = 50 ,dFrameAsv = dframe2plus$ binnary_ASV_df$ ASV_name,weights_vec = df_weights2plus$ Sum_RA)
saveRDS (hamming_clusters_alpha0.35 ,file = paste0 (savingdir,'/' ,'hamming_weighted_clusters_2plus_alpha0.35' ))
}
Code
running = F
if (running){
inducedDist_ = inducedDist (
dFrame = dframe3plus$ dframe,
c1 = 1 ,c2= 1000 ,c3= 10 ,c4= 10 ,
compMatrix = dframe3plus$ ASV_composition %>% data.frame ())
hammingDistMatrix = hammingDist (dframe3plus$ binnary_ASV_df %>% select (- ASV_name) %>% as.matrix ())
hammingDistMatrix = normalizeDistMatrix (hammingDistMatrix)
## Varying Alpha
## Alpha == 0 ---------------------------------------------------------------------------------------------------------------------
hamming_clusters_alpha0 <- clusterize_ward_pam_weighted (
distMatrix = hammingDistMatrix,
maxnclust = 50 ,dFrameAsv = dframe3plus$ binnary_ASV_df$ ASV_name,weights_vec = df_weights3plus$ Sum_RA)
saveRDS (hamming_clusters_alpha0,file = paste0 (savingdir,'/' ,'hamming_weighted_clusters_3plus_alpha0' ))
## Varying Alpha
## Alpha == 0.1 ---------------------------------------------------------------------------------------------------------------------
alpha_ = 0.1
mydist = (1 - alpha_) * hammingDistMatrix + alpha_* inducedDist_$ normalizedMyDist
#running = F
hamming_clusters_alpha0.1 <- clusterize_ward_pam_weighted (
distMatrix = mydist,
maxnclust = 50 ,dFrameAsv = dframe3plus$ binnary_ASV_df$ ASV_name,weights_vec = df_weights3plus$ Sum_RA)
saveRDS (hamming_clusters_alpha0.1 ,file = paste0 (savingdir,'/' ,'hamming_weighted_clusters_3plus_alpha0.1' ))
## Alpha == 0.25 ---------------------------------------------------------------------------------------------------------------------
alpha_ = 0.25
mydist = (1 - alpha_) * hammingDistMatrix + alpha_* inducedDist_$ normalizedMyDist
#running = F
hamming_clusters_alpha0.25 <- clusterize_ward_pam_weighted (
distMatrix = mydist,
maxnclust = 50 ,dFrameAsv = dframe3plus$ binnary_ASV_df$ ASV_name,weights_vec = df_weights3plus$ Sum_RA)
saveRDS (hamming_clusters_alpha0.25 ,file = paste0 (savingdir,'/' ,'hamming_weighted_clusters_3plus_alpha0.25' ))
## Alpha == 0.35 ---------------------------------------------------------------------------------------------------------------------
alpha_ = 0.35
mydist = (1 - alpha_) * hammingDistMatrix + alpha_* inducedDist_$ normalizedMyDist
#running = F
hamming_clusters_alpha0.35 <- clusterize_ward_pam_weighted (
distMatrix = mydist,
maxnclust = 50 ,dFrameAsv = dframe3plus$ binnary_ASV_df$ ASV_name,weights_vec = df_weights3plus$ Sum_RA)
saveRDS (hamming_clusters_alpha0.35 ,file = paste0 (savingdir,'/' ,'hamming_weighted_clusters_3plus_alpha0.35' ))
}
Code
running = F
if (running){
hamming_clusters_alpha0 = readRDS (file = paste0 (savingdir,'/' ,'hamming_clusters_alpha0' ))
hamming_clusters_alpha0.1 = readRDS (file = paste0 (savingdir,'/' ,'hamming_clusters_alpha0.1' ))
hamming_clusters_alpha0.25 = readRDS (file = paste0 (savingdir,'/' ,'hamming_clusters_alpha0.25' ))
hamming_clusters_alpha0.35 = readRDS (file = paste0 (savingdir,'/' ,'hamming_clusters_alpha0.35' ))
hammingDistMatrix = hammingDist (dframe_allASVs$ binnary_ASV_df %>% select (- ASV_name) %>% as.matrix ())
hammingDistMatrix = normalizeDistMatrix (hammingDistMatrix)
#### ------ alpha =0
list_eval <- list ()
list_eval$ hclust<- list ()
list_eval$ pam <- list ()
numberOfClusters = ncol (hamming_clusters_alpha0$ dFrameAsv_hclust)- 1
cols_to_evaluate = colnames (hamming_clusters_alpha0$ dFrameAsv_hclust)
cols_to_evaluate = cols_to_evaluate[- 1 ]
for (i in 2 : length (cols_to_evaluate)){
list_eval$ hclust[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0$ dFrameAsv_hclust %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
list_eval$ pam[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0$ dFrameAsv_pam %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
}
saveRDS (list_eval,paste0 (savingdir,'/' ,'list_eval_alpha0' ))
rm (list_eval)
#### ------ alpha = 0.1
list_eval <- list ()
list_eval$ hclust<- list ()
list_eval$ pam <- list ()
numberOfClusters = ncol (hamming_clusters_alpha0.1 $ dFrameAsv_hclust)- 1
cols_to_evaluate = colnames (hamming_clusters_alpha0.1 $ dFrameAsv_hclust)
cols_to_evaluate = cols_to_evaluate[- 1 ]
for (i in 2 : length (cols_to_evaluate)){
list_eval$ hclust[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0.1 $ dFrameAsv_hclust %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
list_eval$ pam[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0.1 $ dFrameAsv_pam %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
}
saveRDS (list_eval,paste0 (savingdir,'/' ,'list_eval_alpha0.1' ))
rm (list_eval)
#### ------ alpha = 0.25
list_eval <- list ()
list_eval$ hclust<- list ()
list_eval$ pam <- list ()
numberOfClusters = ncol (hamming_clusters_alpha0.25 $ dFrameAsv_hclust)- 1
cols_to_evaluate = colnames (hamming_clusters_alpha0.25 $ dFrameAsv_hclust)
cols_to_evaluate = cols_to_evaluate[- 1 ]
for (i in 2 : length (cols_to_evaluate)){
list_eval$ hclust[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0.25 $ dFrameAsv_hclust %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
list_eval$ pam[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0.25 $ dFrameAsv_pam %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
}
saveRDS (list_eval,paste0 (savingdir,'/' ,'list_eval_alpha0.25' ))
rm (list_eval)
#### ------ alpha = 0.35
list_eval <- list ()
list_eval$ hclust<- list ()
list_eval$ pam <- list ()
numberOfClusters = ncol (hamming_clusters_alpha0.35 $ dFrameAsv_hclust)- 1
cols_to_evaluate = colnames (hamming_clusters_alpha0.35 $ dFrameAsv_hclust)
cols_to_evaluate = cols_to_evaluate[- 1 ]
for (i in 2 : length (cols_to_evaluate)){
list_eval$ hclust[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0.35 $ dFrameAsv_hclust %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
list_eval$ pam[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0.35 $ dFrameAsv_pam %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
}
saveRDS (list_eval,paste0 (savingdir,'/' ,'list_eval_alpha0.35' ))
}
Code
list_eval_alpha0 = readRDS (paste0 (savingdir,'/' ,'list_eval_alpha0' ))
list_eval_alpha0.1 = readRDS (paste0 (savingdir,'/' ,'list_eval_alpha0.1' ))
list_eval_alpha0.25 = readRDS (paste0 (savingdir,'/' ,'list_eval_alpha0.25' ))
list_eval_alpha0.35 = readRDS (paste0 (savingdir,'/' ,'list_eval_alpha0.35' ))
df_summary_eval = bind_rows (
summarise_eval_clusters_distMatrix (list_eval_alpha0) %>% mutate (alpha= 0 ),
summarise_eval_clusters_distMatrix (list_eval_alpha0.1 ) %>% mutate (alpha= 0.1 ),
summarise_eval_clusters_distMatrix (list_eval_alpha0.25 ) %>% mutate (alpha= 0.25 ),
summarise_eval_clusters_distMatrix (list_eval_alpha0.35 ) %>% mutate (alpha= 0.35 ))
df_summary_eval %>%
select (nclust,method,alpha,
within_between_avg,numer_within_between_avg,denom_within_between_avg) %>%
pivot_longer (cols = - c (1 ,2 ,3 )) %>% mutate (alpha = factor (alpha)) %>%
filter (nclust> 2 ) %>%
ggplot (aes (x= nclust,y= value,color= method,linetype= alpha))+
geom_line ()+
facet_wrap (~ name,scales = 'free' )+
theme_minimal ()+ theme (legend.position = 'bottom' )
Code
df_summary_eval %>%
select (nclust,method,alpha,everything ()) %>%
pivot_longer (cols = - c (1 ,2 ,3 )) %>% mutate (alpha = factor (alpha)) %>%
ggplot (aes (x= nclust,y= value,color= method,linetype= alpha))+
geom_line ()+
facet_wrap (~ name,scales = 'free' )+
theme_minimal ()+ theme (legend.position = 'bottom' )
Code
running = F
if (running){
hamming_clusters_alpha0 = readRDS (file = paste0 (savingdir,'/' ,'hamming_clusters_2plus_alpha0' ))
hamming_clusters_alpha0.1 = readRDS (file = paste0 (savingdir,'/' ,'hamming_clusters_2plus_alpha0.1' ))
hamming_clusters_alpha0.25 = readRDS (file = paste0 (savingdir,'/' ,'hamming_clusters_2plus_alpha0.25' ))
hamming_clusters_alpha0.35 = readRDS (file = paste0 (savingdir,'/' ,'hamming_clusters_2plus_alpha0.35' ))
hammingDistMatrix = hammingDist (dframe2plus$ binnary_ASV_df %>% select (- ASV_name) %>% as.matrix ())
hammingDistMatrix = normalizeDistMatrix (hammingDistMatrix)
#### ------ alpha =0
list_eval <- list ()
list_eval$ hclust<- list ()
list_eval$ pam <- list ()
numberOfClusters = ncol (hamming_clusters_alpha0$ dFrameAsv_hclust)- 1
cols_to_evaluate = colnames (hamming_clusters_alpha0$ dFrameAsv_hclust)
cols_to_evaluate = cols_to_evaluate[- 1 ]
for (i in 2 : length (cols_to_evaluate)){
list_eval$ hclust[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0$ dFrameAsv_hclust %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
list_eval$ pam[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0$ dFrameAsv_pam %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
}
saveRDS (list_eval,paste0 (savingdir,'/' ,'list_eval_2more_alpha0' ))
rm (list_eval)
#### ------ alpha = 0.1 -----------------------------------------------------------------
list_eval <- list ()
list_eval$ hclust<- list ()
list_eval$ pam <- list ()
numberOfClusters = ncol (hamming_clusters_alpha0.1 $ dFrameAsv_hclust)- 1
cols_to_evaluate = colnames (hamming_clusters_alpha0.1 $ dFrameAsv_hclust)
cols_to_evaluate = cols_to_evaluate[- 1 ]
for (i in 2 : length (cols_to_evaluate)){
list_eval$ hclust[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0.1 $ dFrameAsv_hclust %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
list_eval$ pam[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0.1 $ dFrameAsv_pam %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
}
saveRDS (list_eval,paste0 (savingdir,'/' ,'list_eval_2more_alpha0.1' ))
rm (list_eval)
#### ------ alpha = 0.25 -----------------------------------------------------------------
list_eval <- list ()
list_eval$ hclust<- list ()
list_eval$ pam <- list ()
numberOfClusters = ncol (hamming_clusters_alpha0.25 $ dFrameAsv_hclust)- 1
cols_to_evaluate = colnames (hamming_clusters_alpha0.25 $ dFrameAsv_hclust)
cols_to_evaluate = cols_to_evaluate[- 1 ]
for (i in 2 : length (cols_to_evaluate)){
list_eval$ hclust[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0.25 $ dFrameAsv_hclust %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
list_eval$ pam[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0.25 $ dFrameAsv_pam %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
}
saveRDS (list_eval,paste0 (savingdir,'/' ,'list_eval_2more_alpha0.25' ))
rm (list_eval)
#### ------ alpha = 0.35 -----------------------------------------------------------------
list_eval <- list ()
list_eval$ hclust<- list ()
list_eval$ pam <- list ()
numberOfClusters = ncol (hamming_clusters_alpha0.35 $ dFrameAsv_hclust)- 1
cols_to_evaluate = colnames (hamming_clusters_alpha0.35 $ dFrameAsv_hclust)
cols_to_evaluate = cols_to_evaluate[- 1 ]
for (i in 2 : length (cols_to_evaluate)){
list_eval$ hclust[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0.35 $ dFrameAsv_hclust %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
list_eval$ pam[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0.35 $ dFrameAsv_pam %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
}
saveRDS (list_eval,paste0 (savingdir,'/' ,'list_eval_2more_alpha0.35' ))
}
Code
list_eval_alpha0 = readRDS (paste0 (savingdir,'/' ,'list_eval_2more_alpha0' ))
list_eval_alpha0.1 = readRDS (paste0 (savingdir,'/' ,'list_eval_2more_alpha0.1' ))
list_eval_alpha0.25 = readRDS (paste0 (savingdir,'/' ,'list_eval_2more_alpha0.25' ))
list_eval_alpha0.35 = readRDS (paste0 (savingdir,'/' ,'list_eval_2more_alpha0.35' ))
df_summary_eval = bind_rows (
summarise_eval_clusters_distMatrix (list_eval_alpha0) %>% mutate (alpha= 0 ),
summarise_eval_clusters_distMatrix (list_eval_alpha0.1 ) %>% mutate (alpha= 0.1 ),
summarise_eval_clusters_distMatrix (list_eval_alpha0.25 ) %>% mutate (alpha= 0.25 ),
summarise_eval_clusters_distMatrix (list_eval_alpha0.35 ) %>% mutate (alpha= 0.35 ))
df_summary_eval %>%
select (nclust,method,alpha,
within_between_avg,numer_within_between_avg,denom_within_between_avg) %>%
pivot_longer (cols = - c (1 ,2 ,3 )) %>% mutate (alpha = factor (alpha)) %>%
filter (nclust> 2 ) %>%
ggplot (aes (x= nclust,y= value,color= method,linetype= alpha))+
geom_line ()+
facet_wrap (~ name,scales = 'free' )+
theme_minimal ()+ theme (legend.position = 'bottom' )
Code
df_summary_eval %>%
select (nclust,method,alpha,everything ()) %>%
pivot_longer (cols = - c (1 ,2 ,3 )) %>% mutate (alpha = factor (alpha)) %>%
ggplot (aes (x= nclust,y= value,color= method,linetype= alpha))+
geom_line ()+
facet_wrap (~ name,scales = 'free' )+
theme_minimal ()+ theme (legend.position = 'bottom' )
Code
running = F
if (running){
hamming_clusters_alpha0 = readRDS (file = paste0 (savingdir,'/' ,'hamming_clusters_3plus_alpha0' ))
hamming_clusters_alpha0.1 = readRDS (file = paste0 (savingdir,'/' ,'hamming_clusters_3plus_alpha0.1' ))
hamming_clusters_alpha0.25 = readRDS (file = paste0 (savingdir,'/' ,'hamming_clusters_3plus_alpha0.25' ))
hamming_clusters_alpha0.35 = readRDS (file = paste0 (savingdir,'/' ,'hamming_clusters_3plus_alpha0.35' ))
hammingDistMatrix = hammingDist (dframe3plus$ binnary_ASV_df %>% select (- ASV_name) %>% as.matrix ())
hammingDistMatrix = normalizeDistMatrix (hammingDistMatrix)
#### ------ alpha =0
list_eval <- list ()
list_eval$ hclust<- list ()
list_eval$ pam <- list ()
numberOfClusters = ncol (hamming_clusters_alpha0$ dFrameAsv_hclust)- 1
cols_to_evaluate = colnames (hamming_clusters_alpha0$ dFrameAsv_hclust)
cols_to_evaluate = cols_to_evaluate[- 1 ]
for (i in 2 : length (cols_to_evaluate)){
list_eval$ hclust[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0$ dFrameAsv_hclust %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
list_eval$ pam[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0$ dFrameAsv_pam %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
}
saveRDS (list_eval,paste0 (savingdir,'/' ,'list_eval_3more_alpha0' ))
rm (list_eval)
#### ------ alpha = 0.1 -----------------------------------------------------------------
list_eval <- list ()
list_eval$ hclust<- list ()
list_eval$ pam <- list ()
numberOfClusters = ncol (hamming_clusters_alpha0.1 $ dFrameAsv_hclust)- 1
cols_to_evaluate = colnames (hamming_clusters_alpha0.1 $ dFrameAsv_hclust)
cols_to_evaluate = cols_to_evaluate[- 1 ]
for (i in 2 : length (cols_to_evaluate)){
list_eval$ hclust[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0.1 $ dFrameAsv_hclust %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
list_eval$ pam[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0.1 $ dFrameAsv_pam %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
}
saveRDS (list_eval,paste0 (savingdir,'/' ,'list_eval_3more_alpha0.1' ))
rm (list_eval)
#### ------ alpha = 0.25 -----------------------------------------------------------------
list_eval <- list ()
list_eval$ hclust<- list ()
list_eval$ pam <- list ()
numberOfClusters = ncol (hamming_clusters_alpha0.25 $ dFrameAsv_hclust)- 1
cols_to_evaluate = colnames (hamming_clusters_alpha0.25 $ dFrameAsv_hclust)
cols_to_evaluate = cols_to_evaluate[- 1 ]
for (i in 2 : length (cols_to_evaluate)){
list_eval$ hclust[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0.25 $ dFrameAsv_hclust %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
list_eval$ pam[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0.25 $ dFrameAsv_pam %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
}
saveRDS (list_eval,paste0 (savingdir,'/' ,'list_eval_3more_alpha0.25' ))
rm (list_eval)
#### ------ alpha = 0.35 -----------------------------------------------------------------
list_eval <- list ()
list_eval$ hclust<- list ()
list_eval$ pam <- list ()
numberOfClusters = ncol (hamming_clusters_alpha0.35 $ dFrameAsv_hclust)- 1
cols_to_evaluate = colnames (hamming_clusters_alpha0.35 $ dFrameAsv_hclust)
cols_to_evaluate = cols_to_evaluate[- 1 ]
for (i in 2 : length (cols_to_evaluate)){
list_eval$ hclust[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0.35 $ dFrameAsv_hclust %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
list_eval$ pam[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0.35 $ dFrameAsv_pam %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
}
saveRDS (list_eval,paste0 (savingdir,'/' ,'list_eval_3more_alpha0.35' ))
}
Code
list_eval_alpha0 = readRDS (paste0 (savingdir,'/' ,'list_eval_3more_alpha0' ))
list_eval_alpha0.1 = readRDS (paste0 (savingdir,'/' ,'list_eval_3more_alpha0.1' ))
list_eval_alpha0.25 = readRDS (paste0 (savingdir,'/' ,'list_eval_3more_alpha0.25' ))
list_eval_alpha0.35 = readRDS (paste0 (savingdir,'/' ,'list_eval_3more_alpha0.35' ))
df_summary_eval = bind_rows (
summarise_eval_clusters_distMatrix (list_eval_alpha0) %>% mutate (alpha= 0 ),
summarise_eval_clusters_distMatrix (list_eval_alpha0.1 ) %>% mutate (alpha= 0.1 ),
summarise_eval_clusters_distMatrix (list_eval_alpha0.25 ) %>% mutate (alpha= 0.25 ),
summarise_eval_clusters_distMatrix (list_eval_alpha0.35 ) %>% mutate (alpha= 0.35 ))
df_summary_eval %>%
select (nclust,method,alpha,
within_between_avg,numer_within_between_avg,denom_within_between_avg) %>%
pivot_longer (cols = - c (1 ,2 ,3 )) %>% mutate (alpha = factor (alpha)) %>%
filter (nclust> 2 ) %>%
ggplot (aes (x= nclust,y= value,color= method,linetype= alpha))+
geom_line ()+
facet_wrap (~ name,scales = 'free' )+
theme_minimal ()+ theme (legend.position = 'bottom' )
Code
df_summary_eval %>%
select (nclust,method,alpha,everything ()) %>%
pivot_longer (cols = - c (1 ,2 ,3 )) %>% mutate (alpha = factor (alpha)) %>%
ggplot (aes (x= nclust,y= value,color= method,linetype= alpha))+
geom_line ()+
facet_wrap (~ name,scales = 'free' )+
theme_minimal ()+ theme (legend.position = 'bottom' )
Code
running = F
if (running){
hamming_clusters_alpha0 = readRDS (file = paste0 (savingdir,'/' ,'hamming_weighted_clusters_alpha0' ))
hamming_clusters_alpha0.1 = readRDS (file = paste0 (savingdir,'/' ,'hamming_weighted_clusters_alpha0.1' ))
hamming_clusters_alpha0.25 = readRDS (file = paste0 (savingdir,'/' ,'hamming_weighted_clusters_alpha0.25' ))
hamming_clusters_alpha0.35 = readRDS (file = paste0 (savingdir,'/' ,'hamming_weighted_clusters_alpha0.35' ))
hammingDistMatrix = hammingDist (dframe_allASVs$ binnary_ASV_df %>% select (- ASV_name) %>% as.matrix ())
hammingDistMatrix = normalizeDistMatrix (hammingDistMatrix)
#### ------ alpha =0
list_eval <- list ()
list_eval$ hclust<- list ()
list_eval$ pam <- list ()
numberOfClusters = ncol (hamming_clusters_alpha0$ dFrameAsv_hclust)- 1
cols_to_evaluate = colnames (hamming_clusters_alpha0$ dFrameAsv_hclust)
cols_to_evaluate = cols_to_evaluate[- 1 ]
for (i in 2 : length (cols_to_evaluate)){
list_eval$ hclust[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0$ dFrameAsv_hclust %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
list_eval$ pam[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0$ dFrameAsv_pam %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
}
saveRDS (list_eval,paste0 (savingdir,'/' ,'list_eval_weighted_alpha0' ))
rm (list_eval)
#### ------ alpha = 0.1
list_eval <- list ()
list_eval$ hclust<- list ()
list_eval$ pam <- list ()
numberOfClusters = ncol (hamming_clusters_alpha0.1 $ dFrameAsv_hclust)- 1
cols_to_evaluate = colnames (hamming_clusters_alpha0.1 $ dFrameAsv_hclust)
cols_to_evaluate = cols_to_evaluate[- 1 ]
for (i in 2 : length (cols_to_evaluate)){
list_eval$ hclust[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0.1 $ dFrameAsv_hclust %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
list_eval$ pam[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0.1 $ dFrameAsv_pam %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
}
saveRDS (list_eval,paste0 (savingdir,'/' ,'list_eval_weighted_alpha0.1' ))
rm (list_eval)
#### ------ alpha = 0.25
list_eval <- list ()
list_eval$ hclust<- list ()
list_eval$ pam <- list ()
numberOfClusters = ncol (hamming_clusters_alpha0.25 $ dFrameAsv_hclust)- 1
cols_to_evaluate = colnames (hamming_clusters_alpha0.25 $ dFrameAsv_hclust)
cols_to_evaluate = cols_to_evaluate[- 1 ]
for (i in 2 : length (cols_to_evaluate)){
list_eval$ hclust[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0.25 $ dFrameAsv_hclust %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
list_eval$ pam[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0.25 $ dFrameAsv_pam %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
}
saveRDS (list_eval,paste0 (savingdir,'/' ,'list_eval_weighted_alpha0.25' ))
rm (list_eval)
#### ------ alpha = 0.35
list_eval <- list ()
list_eval$ hclust<- list ()
list_eval$ pam <- list ()
numberOfClusters = ncol (hamming_clusters_alpha0.35 $ dFrameAsv_hclust)- 1
cols_to_evaluate = colnames (hamming_clusters_alpha0.35 $ dFrameAsv_hclust)
cols_to_evaluate = cols_to_evaluate[- 1 ]
for (i in 2 : length (cols_to_evaluate)){
list_eval$ hclust[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0.35 $ dFrameAsv_hclust %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
list_eval$ pam[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0.35 $ dFrameAsv_pam %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
}
saveRDS (list_eval,paste0 (savingdir,'/' ,'list_eval_weighted_alpha0.35' ))
}
Code
list_eval_alpha0 = readRDS (paste0 (savingdir,'/' ,'list_eval_weighted_alpha0' ))
list_eval_alpha0.1 = readRDS (paste0 (savingdir,'/' ,'list_eval_weighted_alpha0.1' ))
list_eval_alpha0.25 = readRDS (paste0 (savingdir,'/' ,'list_eval_weighted_alpha0.25' ))
list_eval_alpha0.35 = readRDS (paste0 (savingdir,'/' ,'list_eval_weighted_alpha0.35' ))
df_summary_eval = bind_rows (
summarise_eval_clusters_distMatrix (list_eval_alpha0) %>% mutate (alpha= 0 ),
summarise_eval_clusters_distMatrix (list_eval_alpha0.1 ) %>% mutate (alpha= 0.1 ),
summarise_eval_clusters_distMatrix (list_eval_alpha0.25 ) %>% mutate (alpha= 0.25 ),
summarise_eval_clusters_distMatrix (list_eval_alpha0.35 ) %>% mutate (alpha= 0.35 ))
df_summary_eval %>%
select (nclust,method,alpha,
within_between_avg,numer_within_between_avg,denom_within_between_avg) %>%
pivot_longer (cols = - c (1 ,2 ,3 )) %>% mutate (alpha = factor (alpha)) %>%
filter (nclust> 2 ) %>%
ggplot (aes (x= nclust,y= value,color= method,linetype= alpha))+
geom_line ()+
facet_wrap (~ name,scales = 'free' )+
theme_minimal ()+ theme (legend.position = 'bottom' )
Code
df_summary_eval %>%
select (nclust,method,alpha,everything ()) %>%
pivot_longer (cols = - c (1 ,2 ,3 )) %>% mutate (alpha = factor (alpha)) %>%
ggplot (aes (x= nclust,y= value,color= method,linetype= alpha))+
geom_line ()+
facet_wrap (~ name,scales = 'free' )+
theme_minimal ()+ theme (legend.position = 'bottom' )
Code
running = F
if (running){
hamming_clusters_alpha0 = readRDS (file = paste0 (savingdir,'/' ,'hamming_weighted_clusters_2plus_alpha0' ))
hamming_clusters_alpha0.1 = readRDS (file = paste0 (savingdir,'/' ,'hamming_weighted_clusters_2plus_alpha0.1' ))
hamming_clusters_alpha0.25 = readRDS (file = paste0 (savingdir,'/' ,'hamming_weighted_clusters_2plus_alpha0.25' ))
hamming_clusters_alpha0.35 = readRDS (file = paste0 (savingdir,'/' ,'hamming_weighted_clusters_2plus_alpha0.35' ))
hammingDistMatrix = hammingDist (dframe2plus$ binnary_ASV_df %>% select (- ASV_name) %>% as.matrix ())
hammingDistMatrix = normalizeDistMatrix (hammingDistMatrix)
#### ------ alpha =0
list_eval <- list ()
list_eval$ hclust<- list ()
list_eval$ pam <- list ()
numberOfClusters = ncol (hamming_clusters_alpha0$ dFrameAsv_hclust)- 1
cols_to_evaluate = colnames (hamming_clusters_alpha0$ dFrameAsv_hclust)
cols_to_evaluate = cols_to_evaluate[- 1 ]
for (i in 2 : length (cols_to_evaluate)){
list_eval$ hclust[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0$ dFrameAsv_hclust %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
list_eval$ pam[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0$ dFrameAsv_pam %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
}
saveRDS (list_eval,paste0 (savingdir,'/' ,'list_eval_weighted_2more_alpha0' ))
rm (list_eval)
#### ------ alpha = 0.1 -----------------------------------------------------------------
list_eval <- list ()
list_eval$ hclust<- list ()
list_eval$ pam <- list ()
numberOfClusters = ncol (hamming_clusters_alpha0.1 $ dFrameAsv_hclust)- 1
cols_to_evaluate = colnames (hamming_clusters_alpha0.1 $ dFrameAsv_hclust)
cols_to_evaluate = cols_to_evaluate[- 1 ]
for (i in 2 : length (cols_to_evaluate)){
list_eval$ hclust[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0.1 $ dFrameAsv_hclust %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
list_eval$ pam[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0.1 $ dFrameAsv_pam %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
}
saveRDS (list_eval,paste0 (savingdir,'/' ,'list_eval_weighted_2more_alpha0.1' ))
rm (list_eval)
#### ------ alpha = 0.25 -----------------------------------------------------------------
list_eval <- list ()
list_eval$ hclust<- list ()
list_eval$ pam <- list ()
numberOfClusters = ncol (hamming_clusters_alpha0.25 $ dFrameAsv_hclust)- 1
cols_to_evaluate = colnames (hamming_clusters_alpha0.25 $ dFrameAsv_hclust)
cols_to_evaluate = cols_to_evaluate[- 1 ]
for (i in 2 : length (cols_to_evaluate)){
list_eval$ hclust[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0.25 $ dFrameAsv_hclust %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
list_eval$ pam[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0.25 $ dFrameAsv_pam %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
}
saveRDS (list_eval,paste0 (savingdir,'/' ,'list_eval_weighted_2more_alpha0.25' ))
rm (list_eval)
#### ------ alpha = 0.35 -----------------------------------------------------------------
list_eval <- list ()
list_eval$ hclust<- list ()
list_eval$ pam <- list ()
numberOfClusters = ncol (hamming_clusters_alpha0.35 $ dFrameAsv_hclust)- 1
cols_to_evaluate = colnames (hamming_clusters_alpha0.35 $ dFrameAsv_hclust)
cols_to_evaluate = cols_to_evaluate[- 1 ]
for (i in 2 : length (cols_to_evaluate)){
list_eval$ hclust[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0.35 $ dFrameAsv_hclust %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
list_eval$ pam[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0.35 $ dFrameAsv_pam %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
}
saveRDS (list_eval,paste0 (savingdir,'/' ,'list_eval_weighted_2more_alpha0.35' ))
}
Code
list_eval_alpha0 = readRDS (paste0 (savingdir,'/' ,'list_eval_weighted_2more_alpha0' ))
list_eval_alpha0.1 = readRDS (paste0 (savingdir,'/' ,'list_eval_weighted_2more_alpha0.1' ))
list_eval_alpha0.25 = readRDS (paste0 (savingdir,'/' ,'list_eval_weighted_2more_alpha0.25' ))
list_eval_alpha0.35 = readRDS (paste0 (savingdir,'/' ,'list_eval_weighted_2more_alpha0.35' ))
df_summary_eval = bind_rows (
summarise_eval_clusters_distMatrix (list_eval_alpha0) %>% mutate (alpha= 0 ),
summarise_eval_clusters_distMatrix (list_eval_alpha0.1 ) %>% mutate (alpha= 0.1 ),
summarise_eval_clusters_distMatrix (list_eval_alpha0.25 ) %>% mutate (alpha= 0.25 ),
summarise_eval_clusters_distMatrix (list_eval_alpha0.35 ) %>% mutate (alpha= 0.35 ))
df_summary_eval %>%
select (nclust,method,alpha,
within_between_avg,numer_within_between_avg,denom_within_between_avg) %>%
pivot_longer (cols = - c (1 ,2 ,3 )) %>% mutate (alpha = factor (alpha)) %>%
filter (nclust> 2 ) %>%
ggplot (aes (x= nclust,y= value,color= method,linetype= alpha))+
geom_line ()+
facet_wrap (~ name,scales = 'free' )+
theme_minimal ()+ theme (legend.position = 'bottom' )
Code
df_summary_eval %>%
select (nclust,method,alpha,everything ()) %>%
pivot_longer (cols = - c (1 ,2 ,3 )) %>% mutate (alpha = factor (alpha)) %>%
ggplot (aes (x= nclust,y= value,color= method,linetype= alpha))+
geom_line ()+
facet_wrap (~ name,scales = 'free' )+
theme_minimal ()+ theme (legend.position = 'bottom' )
Code
running = F
if (running){
hamming_clusters_alpha0 = readRDS (file = paste0 (savingdir,'/' ,'hamming_weighted_clusters_3plus_alpha0' ))
hamming_clusters_alpha0.1 = readRDS (file = paste0 (savingdir,'/' ,'hamming_weighted_clusters_3plus_alpha0.1' ))
hamming_clusters_alpha0.25 = readRDS (file = paste0 (savingdir,'/' ,'hamming_weighted_clusters_3plus_alpha0.25' ))
hamming_clusters_alpha0.35 = readRDS (file = paste0 (savingdir,'/' ,'hamming_weighted_clusters_3plus_alpha0.35' ))
hammingDistMatrix = hammingDist (dframe3plus$ binnary_ASV_df %>% select (- ASV_name) %>% as.matrix ())
hammingDistMatrix = normalizeDistMatrix (hammingDistMatrix)
#### ------ alpha =0
list_eval <- list ()
list_eval$ hclust<- list ()
list_eval$ pam <- list ()
numberOfClusters = ncol (hamming_clusters_alpha0$ dFrameAsv_hclust)- 1
cols_to_evaluate = colnames (hamming_clusters_alpha0$ dFrameAsv_hclust)
cols_to_evaluate = cols_to_evaluate[- 1 ]
for (i in 2 : length (cols_to_evaluate)){
list_eval$ hclust[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0$ dFrameAsv_hclust %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
list_eval$ pam[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0$ dFrameAsv_pam %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
}
saveRDS (list_eval,paste0 (savingdir,'/' ,'list_eval_weighted_3more_alpha0' ))
rm (list_eval)
#### ------ alpha = 0.1 -----------------------------------------------------------------
list_eval <- list ()
list_eval$ hclust<- list ()
list_eval$ pam <- list ()
numberOfClusters = ncol (hamming_clusters_alpha0.1 $ dFrameAsv_hclust)- 1
cols_to_evaluate = colnames (hamming_clusters_alpha0.1 $ dFrameAsv_hclust)
cols_to_evaluate = cols_to_evaluate[- 1 ]
for (i in 2 : length (cols_to_evaluate)){
list_eval$ hclust[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0.1 $ dFrameAsv_hclust %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
list_eval$ pam[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0.1 $ dFrameAsv_pam %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
}
saveRDS (list_eval,paste0 (savingdir,'/' ,'list_eval_weighted_3more_alpha0.1' ))
rm (list_eval)
#### ------ alpha = 0.25 -----------------------------------------------------------------
list_eval <- list ()
list_eval$ hclust<- list ()
list_eval$ pam <- list ()
numberOfClusters = ncol (hamming_clusters_alpha0.25 $ dFrameAsv_hclust)- 1
cols_to_evaluate = colnames (hamming_clusters_alpha0.25 $ dFrameAsv_hclust)
cols_to_evaluate = cols_to_evaluate[- 1 ]
for (i in 2 : length (cols_to_evaluate)){
list_eval$ hclust[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0.25 $ dFrameAsv_hclust %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
list_eval$ pam[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0.25 $ dFrameAsv_pam %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
}
saveRDS (list_eval,paste0 (savingdir,'/' ,'list_eval_weighted_3more_alpha0.25' ))
rm (list_eval)
#### ------ alpha = 0.35 -----------------------------------------------------------------
list_eval <- list ()
list_eval$ hclust<- list ()
list_eval$ pam <- list ()
numberOfClusters = ncol (hamming_clusters_alpha0.35 $ dFrameAsv_hclust)- 1
cols_to_evaluate = colnames (hamming_clusters_alpha0.35 $ dFrameAsv_hclust)
cols_to_evaluate = cols_to_evaluate[- 1 ]
for (i in 2 : length (cols_to_evaluate)){
list_eval$ hclust[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0.35 $ dFrameAsv_hclust %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
list_eval$ pam[[i-1 ]] = eval_clusters_distMatrix (
distMatrix = hammingDistMatrix,
df_ASV_Clusters = hamming_clusters_alpha0.35 $ dFrameAsv_pam %>% select (ASV= ASV,any_of (cols_to_evaluate[i]))
)
}
saveRDS (list_eval,paste0 (savingdir,'/' ,'list_eval_weighted_3more_alpha0.35' ))
}
Code
list_eval_alpha0 = readRDS (paste0 (savingdir,'/' ,'list_eval_weighted_3more_alpha0' ))
list_eval_alpha0.1 = readRDS (paste0 (savingdir,'/' ,'list_eval_weighted_3more_alpha0.1' ))
list_eval_alpha0.25 = readRDS (paste0 (savingdir,'/' ,'list_eval_weighted_3more_alpha0.25' ))
list_eval_alpha0.35 = readRDS (paste0 (savingdir,'/' ,'list_eval_weighted_3more_alpha0.35' ))
df_summary_eval = bind_rows (
summarise_eval_clusters_distMatrix (list_eval_alpha0) %>% mutate (alpha= 0 ),
summarise_eval_clusters_distMatrix (list_eval_alpha0.1 ) %>% mutate (alpha= 0.1 ),
summarise_eval_clusters_distMatrix (list_eval_alpha0.25 ) %>% mutate (alpha= 0.25 ),
summarise_eval_clusters_distMatrix (list_eval_alpha0.35 ) %>% mutate (alpha= 0.35 ))
df_summary_eval %>%
select (nclust,method,alpha,
within_between_avg,numer_within_between_avg,denom_within_between_avg) %>%
pivot_longer (cols = - c (1 ,2 ,3 )) %>% mutate (alpha = factor (alpha)) %>%
filter (nclust> 2 ) %>%
ggplot (aes (x= nclust,y= value,color= method,linetype= alpha))+
geom_line ()+
facet_wrap (~ name,scales = 'free' )+
theme_minimal ()+ theme (legend.position = 'bottom' )
Code
df_summary_eval %>%
select (nclust,method,alpha,everything ()) %>%
pivot_longer (cols = - c (1 ,2 ,3 )) %>% mutate (alpha = factor (alpha)) %>%
ggplot (aes (x= nclust,y= value,color= method,linetype= alpha))+
geom_line ()+
facet_wrap (~ name,scales = 'free' )+
theme_minimal ()+ theme (legend.position = 'bottom' )